home *** CD-ROM | disk | FTP | other *** search
/ The 640 MEG Shareware Studio 2 / The 640 Meg Shareware Studio CD-ROM Volume II (Data Express)(1993).ISO / os2 / pccts.zip / DECL.G < prev    next >
Text File  |  1992-12-08  |  15KB  |  564 lines

  1. /*
  2.  * ANSI C recognizer
  3.  *
  4.  * Gives some error messages for semantics, but this grammar
  5.  * checks mostly syntax.  We make no claim that it rigorously follows
  6.  * the ANSI C standard, but it's a good start.
  7.  *
  8.  * Type trees are constructed and maintained in the symbol table.
  9.  * Expression trees are constructed and then thrown away.  The
  10.  * user can presumably do something more useful with them.
  11.  *
  12.  * Requires PCCTS Version 1.00
  13.  *
  14.  * Terence Parr
  15.  * July 1991
  16.  */
  17.  
  18. #header <<
  19.     #define D_TextSize 20
  20.     #include "charbuf.h"
  21.     #include "type.h"
  22.     #include "sym.h"
  23.     #include "proto.h"
  24. >>
  25.  
  26. #token "[\ \t]+"    << zzskip(); >>
  27. #token "\n"            << zzline++; zzskip(); >>
  28.  
  29. #token "#line [\ \t]+ [0-9]+ ~[\n]*\n"
  30.                     << zzline = atoi(zzlextext+5); zzskip(); >>
  31. #token "# [\ \t]+ [0-9]+ ~[\n]*\n"
  32.                     << zzline = atoi(zzlextext+1); zzskip(); >>
  33.  
  34. #token "\""            << zzmode(STRINGS); zzmore(); >>
  35. #token "'"            << zzmode(CHARACTERS); zzmore(); >>
  36.  
  37. /* these tokens are used as node types, but not referenced in grammar */
  38. #token Var
  39. #token Func
  40. #token FuncCall
  41. #token Label
  42. #token PostInc
  43. #token PostDec
  44. #token StructPtrRef
  45. #token StructRef
  46. #token AggrTag
  47.  
  48. #lexclass STRINGS
  49. #token STRING "\""    << zzmode(START); >>
  50. #token "\\\""        << zzmore(); >>
  51. #token "\\n"        << zzreplchar('\n'); zzmore(); >>
  52. #token "\\r"        << zzreplchar('\r'); zzmore(); >>
  53. #token "\\t"        << zzreplchar('\t'); zzmore(); >>
  54. #token "\\[1-9][0-9]*"
  55.                     << zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >>
  56. #token "\\0[0-7]*"    << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >>
  57. #token "\\0x[0-9]+"    << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >>
  58. #token "\\~[\n\r]"    << zzmore(); >>
  59. #token "[\n\r]"        << zzline++; zzmore(); /* print warning about \n in str */>>
  60. #token "~[\"\n\r\\]+"<< zzmore(); >>
  61.  
  62. #lexclass CHARACTERS
  63. #token CHARACTER    "'" << zzmode(START); >>
  64. #token "\\'"        << zzmore(); >>
  65. #token "\\n"        << zzreplchar('\n'); zzmore(); >>
  66. #token "\\r"        << zzreplchar('\r'); zzmore(); >>
  67. #token "\\t"        << zzreplchar('\t'); zzmore(); >>
  68. #token "\\[1-9][0-9]*"
  69.                     << zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >>
  70. #token "\\0[0-7]*"    << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >>
  71. #token "\\0x[0-9]+"    << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >>
  72. #token "\\~[\n\r]"    << zzmore(); >>
  73. #token "[\n\r]"        << zzline++; zzmore(); /* print warning about \n in str */>>
  74. #token "~[\'\n\r\\]"<< zzmore(); >>
  75.  
  76. #lexclass START
  77.  
  78. #token OCT_NUM "[0][0-7]*"
  79. #token L_OCT_NUM "[0][0-7]*[Ll]"
  80. #token INT_NUM "[1-9][0-9]*"
  81. #token L_INT_NUM "[1-9][0-9]*[Ll]"
  82. #token HEX_NUM "[0][Xx][0-9A-Fa-f]+"
  83. #token L_HEX_NUM "[0][Xx][0-9A-Fa-f]+[Ll]"
  84. #token FNUM "([1-9][0-9]*{.[0-9]*} | {[0]}.[0-9]+) {[Ee]{[\+\-]}[0-9]+}"
  85. #token PreInc "\+\+"
  86. #token PreDec "\-\-"
  87. #token LPAREN "\("
  88. #token LBRACK "\["
  89. #token SizeOf "sizeof"
  90.  
  91. globals!:    <<AST *base, *t; Sym *p;
  92.               zzs_scope(&Globals);>>
  93.  
  94.             (    <<;>>
  95.                 <<Params=NULL;>> decl[GLOBAL]
  96.             |    <<Params=NULL;>>
  97.                 <<base = #[BaseTypeQ,scNone,0,tInt,NULL];>>
  98.                 declarator[base]
  99.                 <<handleSymbol(scNone, $1.text, #1, NULL, GLOBAL);
  100.                   t = defineArgs(#1, &Params);
  101.                 >>
  102.                 func_def[t]
  103.                 <<english( #(#[SymQ,$1.text,#2], #1) );>>
  104.                 <<Proto($1.text, #1);
  105.                   p = zzs_rmscope(&Params);
  106.                   pScope(p, "parameters\n");
  107.                 >>
  108.             )*
  109.  
  110.             <<p = zzs_rmscope(&Globals);
  111.               pScope(p, "globals\n");
  112.               ProtoVars(p);>>
  113.             "@"
  114.         ;
  115.  
  116. /* d e c l  --  recognize a declaration or definition.
  117.  *
  118.  * We handle typedefs in a bizarre way.  WORD's are converted
  119.  * to TypeName's inside the lexical action for token WORD.  So,
  120.  * because of the lookahead, we need to get a TypeName into
  121.  * the symbol table before the lookahead can get a reference
  122.  * to this. e.g. "typedef int I; I i;"  We actually add the typedef
  123.  * name to the symbol table when we see its definition in
  124.  * rule declaration and friends.  Aggregate tags are handled in a
  125.  * similar fashion by adding them to the symbol table as they
  126.  * are declared.
  127.  *
  128.  * functions definitions always have a FunctionQ node at the root
  129.  * of the declarator since anything in front would make a pointer to
  130.  * a function or whatever.  e.g. int *f(); --> () * int --> "function
  131.  * returning pointer to integer."  Or, int (*f)() --> * () int -->
  132.  * "pointer to function returning integer."  The first is a function
  133.  * symbol, the 2nd is a variable.
  134.  */
  135. decl![int level]
  136.         :    <<int sc=scNone, t=tInt, cv=cvNone, typ;
  137.               AST *base, *d, *init=NULL, *tr;
  138.               char *w;
  139.               Sym *n=NULL, *p;>>
  140.             (    (sclass[&sc] | typeq[&cv])+
  141.                 (    type[&t]        <<base = #[BaseTypeQ,cv,sc,t,$1.text];>>
  142.                 |    aggr[sc,cv]        <<base = #1;>>
  143.                 |    enum_def        <<base = #1;>>
  144.                 |                    <<base = #[BaseTypeQ,cv,sc,tInt,NULL];>>
  145.                 )
  146.             |    type[&t]            <<base = #[BaseTypeQ,cvNone,scNone,t,$1.text];>>
  147.             |    aggr[scNone,cvNone]    <<base = #1;>>
  148.             |    enum_def            <<base = #1;>>
  149.             )
  150.             (    declarator[base]    <<d=#1; w=$1.text;>>
  151.                 (    { <<init=NULL;>> "=" initialize <<init=#2;>> }
  152.                     <<if ( d->nodeType == FunctionQ ) {
  153.                         sc |= scExtern;
  154.                         bottom(d)->data.t.sc |= scExtern;
  155.                       }
  156.                       handleSymbol(sc, w, d, init, $level);>>
  157.                     (    <<english( #(#[SymQ,w,init], d) );>>
  158.                         ","
  159.                         declarator[base]
  160.                         { <<init=NULL;>> "=" initialize <<init=#2;>> }
  161.                         <<english( #(#[SymQ,$2.text,init], #2) );>>
  162.                         <<if ( #2->nodeType == FunctionQ ) {
  163.                             sc |= scExtern;
  164.                             bottom(#2)->data.t.sc |= scExtern;
  165.                           }
  166.                           handleSymbol(sc, $2.text, #2, init, $level);>>
  167.                     )*
  168.                     <<
  169.                     if ( base->data.t.type==tStruct ||
  170.                          base->data.t.type==tUnion ||
  171.                          base->data.t.type==tEnum )
  172.                     {
  173.                         if ( base->data.t.name != NULL )
  174.                         {
  175.                             p = zzs_get(base->data.t.name);
  176.                             if ( p!=NULL ) p->level = $level;
  177.                         }
  178.                     }
  179.                     >>
  180.                     ";"
  181.                 |    <<
  182.                         handleSymbol(sc, w, d, init, $level);
  183.                         tr = defineArgs(d, &Params);
  184.                     >>
  185.                     func_def[tr]
  186.                     <<english( #(#[SymQ,w,#1], d) );>>
  187.                     <<Proto(w, d);
  188.                       p = zzs_rmscope(&Params);
  189.                         pScope(p, "block\n");
  190.                     >>
  191.                 )
  192.             |    ";"
  193.                 <<english( base );>>
  194.                 <<if ( base->data.t.type==tStruct ||
  195.                        base->data.t.type==tUnion ||
  196.                        base->data.t.type==tEnum )
  197.                   {
  198.                     p = zzs_get(base->data.t.name);
  199.                     if ( p!=NULL ) p->level = $level;
  200.                   }
  201.                   else
  202.                     error("missing declarator");
  203.                 >>
  204.             )
  205.         ;
  206.  
  207. sclass![int *sc]
  208.         :    "auto"                <<*$sc |= scAuto;>>
  209.         |    "static"            <<*$sc |= scStatic;>>
  210.         |    "register"            <<*$sc |= scRegister;>>
  211.         |    "extern"            <<*$sc |= scExtern;>>
  212.         |    "typedef"            <<*$sc |= scTypedef;>>
  213.         ;
  214.  
  215. typeq![int *cv]
  216.         :    "const"                <<*$cv |= cvConst;>>
  217.         |    "volatile"            <<*$cv |= cvVolatile;>>
  218.         ;
  219.  
  220. type![int *t]
  221.         :    t1[t]                <<$type = $1;>>
  222.         ;
  223.  
  224. t1![int *type]
  225.         :    (    "unsigned"        <<*$type = tUnsigned;>>
  226.             |    "signed"        <<*$type = tSigned;>>
  227.             )
  228.             (    "char"            <<*$type |= tChar;>>
  229.             |    { "short"        <<*$type |= tShort;>>
  230.                 | "long"        <<*$type |= tLong;>>
  231.                 }
  232.                 { "int"            <<*$type |= tInt;>>
  233.                 }
  234.             )
  235.         |    (    "short"            <<*$type = tShort;>>
  236.                 { "int"         <<*$type |= tInt;>>
  237.                 }
  238.             |    "long"            <<*$type = tLong;>>
  239.                 { "int"            <<*$type |= tInt;>>
  240.                 | "float"        <<*$type |= tFloat;>>
  241.                 | "double"        <<*$type |= tDouble;>>
  242.                 }
  243.             )
  244.         |    "void"                <<*$type = tVoid;>>
  245.         |    "char"                <<*$type = tChar;>>
  246.         |    "int"                <<*$type = tInt;>>
  247.         |    "float"                <<*$type = tFloat;>>
  248.         |    "double"            <<*$type = tDouble;>>
  249.         |    TypeName            <<*$type = tTypeName; $t1 = $1;>>
  250.         ;
  251.  
  252. /* D e c l a r a t o r */
  253.  
  254. /*
  255.  * Build a declarator by appending the base to the bottom of the type-tree
  256.  * matched in dcltor1.  We pass the storage class to dcltor1 in case
  257.  * we have a typedef on our hands which needs to be added to the symbol
  258.  * table ASAP.
  259.  */
  260. declarator![AST *base]
  261.         :    dcltor1[bottom($base)] <<#(bottom(#1), $base);
  262.                                        #0 = (#1==NULL)?$base:#1;
  263.                                      $declarator = $1;>>
  264.         ;
  265.  
  266. /*
  267.  * Match *D1 or D2.  Build type-trees for PointerQ (pointer qualifier)
  268.  * via:
  269.  *
  270.  * #0 =     D1
  271.  *            |
  272.  *            v
  273.  *            *
  274.  *
  275.  * where D? is dcltor? in this grammar.
  276.  */
  277. dcltor1![AST *base]
  278.         :    <<AST *t; int cv=0;>>
  279.             "\*"
  280.             {    "const"            <<cv=cvConst;>>
  281.             |    "volatile"        <<cv=cvVolatile;>>
  282.             }                    <<t = #[PointerQ,cv];>>
  283.             dcltor1[$base]        <<#(bottom(#3), t); #0=(#3==NULL)?t:#3;
  284.                                   $dcltor1 = $3;>>
  285.         |    dcltor2[$base]        <<#0 = #1; $dcltor1 = $1;>>
  286.         ;
  287.  
  288. /*
  289.  * For WORD D3 we return the following
  290.  *
  291.  * $$ =        WORD recognized.
  292.  * #0 =      D3                    (array or func modifier)
  293.  *
  294.  * For ( D1 ) we return
  295.  *
  296.  * $$ =        WORD recognized in D1.
  297.  * #0 =        D1                    (put stuff in (..) above [] or ())
  298.  *            |
  299.  *            v
  300.  *            D3
  301.  *
  302.  * For instance, (*f)() yields
  303.  *
  304.  * $$ =        f
  305.  * #0 =        *                    (pointer to)
  306.  *            |
  307.  *            v
  308.  *           ( )                    (a function)
  309.  *
  310.  * If storage class is scTypedef, we need to add it to the symbol table.
  311.  */
  312. dcltor2![AST *base]                /* pass in storage class for typedefs */
  313.         :    <<AST *t; Sym *n;>>
  314.             WORD                <<if ( $base->data.t.sc&scTypedef )
  315.                                     addsym(TypeName,$1.text,0,NULL,NULL);
  316.                                 >>
  317.             dcltor3                <<#0 = #2; $dcltor2 = $1;>>
  318.         |    "\(" dcltor1[$base] "\)" <<$dcltor2 = $2;>> 
  319.             dcltor3                <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>>
  320.         ;
  321.  
  322. /*
  323.  * return #0 = [expr] or = [nodimension]
  324.  * or ( ) --> arg1 --> ... --> argn for a function
  325.  *
  326.  * multiple [1][2][3] yields
  327.  *
  328.  * #0 =       [1]                    (an 1-element array of)
  329.  *            |
  330.  *            v
  331.  *           [2]                    (2-element arrays of)
  332.  *            |
  333.  *            v
  334.  *           [3]                    (3-element arrays)
  335.  *            
  336.  */
  337. dcltor3!:    "\[" expr1 "\]" dcltor3    <<#0 = #( #[ArrayQ,#2], #4 );>>
  338.         |    "\[" "\]" dcltor3        <<#0 = #( #[ArrayQ,NULL], #3 );>>
  339.         |    "\(" args "\)"            <<#0 = #(NULL, #[FunctionQ], #2);>>
  340.         |                            <<#0 = NULL;>>
  341.         ;
  342.  
  343. /*
  344.  * match a list of arguments.
  345.  *
  346.  * The arguments are siblings of the FunctionQ node in the type
  347.  * tree.  e.g.
  348.  *
  349.  *    [FunctionQ]-->[arg1]--> ... -->[argn]
  350.  *                     |                 |
  351.  *                     v                 v
  352.  *                 [type1]          [type1]
  353.  */
  354. args!    :    <<AST *t;>>
  355.             arg                    <<t=#1;>>
  356.             (    "," arg            <<t = #(NULL, t, #2);>>
  357.             )*
  358.             {    "," "..."        <<t=#(NULL,t,#[BaseTypeQ,0,0,tEllipsis,NULL]);>>
  359.             }
  360.             <<#0 = t;>>
  361.         |
  362.         ;
  363.  
  364. arg!    :    typename            <<#0 = #1;>>
  365.         |    WORD                <<#0 = #[SymQ,$1.text,NULL];>>
  366.         ;
  367.  
  368. /*
  369.  * match a typename -- (used in type-casting and function prototypes).
  370.  * Type-trees look the same as those for decl.  But, a symbol is
  371.  * optional here because they can be used in argument lists.
  372.  */
  373. typename!:    <<int cv, t=tInt; AST *base, *tr=NULL;>>
  374.             (    (typeq[&cv])+
  375.                 {    type[&t]    <<base = #[BaseTypeQ,cv,0,t,$1.text];>>
  376.                 |    aggr[scNone,cv]    <<base = #1;>>
  377.                 }
  378.             |    type[&t]        <<base = #[BaseTypeQ,0,0,t,$1.text];>>
  379.             |    aggr[scNone,cvNone]    <<base = #1;>>
  380.             )
  381.             tdecl[base]            <<if ($2.text[0]!='\0') tr=#[SymQ,$2.text,NULL];
  382.                                   #0=#(tr, #2);>>
  383.         ;
  384.  
  385. /* A g g r e g a t e s */
  386.  
  387. /*
  388.  * match an enum definition; yield following tree:
  389.  *
  390.  * [BaseTypeQ] --> [elem1] --> ... --> [elemn] 
  391.  */
  392. enum_def!:    <<AST *base;>>
  393.             "enum" WORD            <<base=#[BaseTypeQ,0,0,tEnum,$2.text];>>
  394.             enum_lst            <<#0 = #(NULL, base, #3);>>
  395.         ;
  396.  
  397. /*
  398.  * match a list of enumeration elements.
  399.  *
  400.  * The symbols are siblings of each other:
  401.  *
  402.  * [elem1] --> ... --> [elemn] 
  403.  *
  404.  * If an element has an initialization, store a pointer to it in the
  405.  * AST node.
  406.  */
  407. enum_lst!:    <<AST *list, *init=NULL;>>
  408.             "\{"
  409.             WORD
  410.             {    "=" expr1        <<init = #2;>>
  411.             }                    <<list = #[SymQ,$2.text, init];>>
  412.             (    ","
  413.                 WORD
  414.                 {                <<init=NULL;>>
  415.                     "=" expr1    <<init=#2;>>
  416.                 }                <<list = #(NULL,list,#[SymQ,$2.text, init]);>>
  417.             )*
  418.             "\}"
  419.                                 <<#0 = list;>>
  420.         |                        <<#0 = NULL;>>
  421.         ;
  422.  
  423. /*
  424.  * Match a struct/union def.
  425.  * Return a tree like this:
  426.  * 
  427.  *    [BaseTypeQ]-->[fld1]--> ... -->[fldn]
  428.  *                     |                 |
  429.  *                     v                 v
  430.  *                 [type1]          [type1]
  431.  *
  432.  * BUG: Allows two structs to have same name
  433.  */
  434. aggr![int sc, int cv]
  435.         :    <<AST *tr, *base; int t; Sym *typ;>>
  436.             (    "struct"        <<t=tStruct;>>
  437.             |    "union"            <<t=tUnion;>>
  438.             )                    <<base = #[BaseTypeQ,$cv,$sc,t,NULL];>>
  439.             (    (    WORD        <<base->data.t.name = strdup($1.text);>>
  440.                 |    TypeName    <<base->data.t.name = strdup($1.text);>>
  441.                 )
  442.                 ( ag[base]        <<#0 = #(NULL, base, #1);
  443.                                   addsym(AggrTag, base->data.t.name,
  444.                                          0, base, NULL);
  445.                                 >>
  446.                 |                <<#0 = base;>>
  447.                 )
  448.             |    ag[base]        <<#0 = #(NULL, base, #1);>>
  449.             )
  450.         ;
  451.  
  452. /*
  453.  * match a field list for a struct/union
  454.  *
  455.  * The fields are siblings of each other:
  456.  *
  457.  *  [fld1] --> ... --> [fldn] 
  458.  *      |                     |
  459.  *      v                     v
  460.  * [type1]              [type1]
  461.  *
  462.  */
  463. ag![AST *base]
  464.         :    <<AST *t=NULL;>>
  465.             "\{" fdef[$base]    <<#0=t=#2;>>
  466.             (    fdef[$base]        <<#(NULL, t, #1); t = #1;>>
  467.             )*
  468.             "\}"
  469.         ;
  470.  
  471. /*
  472.  * Match one field definition; make the following tree
  473.  *
  474.  * [FieldQ]
  475.  *       |
  476.  *       v
  477.  *  [type1]
  478.  */
  479. fdef![AST *base]
  480.         :    <<int t=tInt; AST *f, *g;>>
  481.             (    type[&t]        <<base = #[BaseTypeQ,0,0,t,$1.text];>>
  482.             |    aggr[scNone,cvNone]    <<base = #1;>>
  483.             )
  484.             field[$base]        <<f = #(#[FieldQ,$2.text], #2);>>
  485.             (    "," field[$base]<<g = #(#[FieldQ,$2.text], #2);
  486.                                   f = #(NULL, f, g);>>
  487.             )*
  488.             ";"
  489.             <<#0 = f;>>
  490.         ;
  491.  
  492. /* bitfields are recognized, but not handled 'cause not too many people
  493.  * use them
  494.  */
  495. field![AST *base]
  496.         :    declarator[$base] { ":" expr1 } <<#0=#1; $field = $1;>>
  497.         |    ":" expr1
  498.         ;
  499.  
  500. /* T y p e  N a m e */
  501.  
  502. tdecl![AST *base]
  503.         :    tdecl1                <<#(bottom(#1), $base);
  504.                                   #0 = (#1==NULL)?$base:#1; $tdecl=$1;>>
  505.         ;
  506.  
  507. tdecl1! :    <<AST *t; int cv=0;>>
  508.             "\*"
  509.             {    "const"            <<cv=cvConst;>>
  510.             |    "volatile"        <<cv=cvVolatile;>>
  511.             }                    <<t = #[PointerQ,cv];>>
  512.             tdecl1                <<#(bottom(#3), t); #0=(#3==NULL)?t:#3;
  513.                                   $tdecl1 = $3;>>
  514.         |    tdecl2                <<#0 = #1; $tdecl1 = $1;>>
  515.         ;
  516.  
  517. tdecl2!    :    <<AST *t=NULL; $tdecl2.text[0] = '\0';>>
  518.             "\(" tdecl1 "\)"    <<$tdecl2 = $2;>>
  519.             tdecl3                <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>>
  520.         |    WORD tdecl3            <<$tdecl2 = $1; #0 = #2;>>
  521.         |    tdecl3                <<#0 = #1;>>
  522.         ;
  523.  
  524. tdecl3!    :    "\[" expr1 "\]" tdecl3<<#0 = #( #[ArrayQ,#2], #4 );>>
  525.         |    "\[" "\]"     tdecl3<<#0 = #( #[ArrayQ,NULL], #3 );>>
  526.         |    "\(" args "\)"        <<#0 = #( NULL, #[FunctionQ], #2 );>>
  527.         |
  528.         ;
  529.  
  530.  
  531. /* I n i t  e x p r e s s i o n s */
  532.  
  533.  
  534. initialize
  535.         :    init2
  536.         |    expr0
  537.         ;
  538.  
  539. /* Build an initialization expression-tree of the form:
  540.  *
  541.  * Single-dimensioned array or structure:
  542.  *
  543.  *    "{"
  544.  *     |
  545.  *     v
  546.  *    [exp1] --> ... --> [expn]
  547.  *
  548.  * Nested structure or multi-dim array:
  549.  *
  550.  *    "{"
  551.  *     |
  552.  *     v
  553.  *    "{" --> ... --> "{"
  554.  *     |                ...
  555.  *     v                ...
  556.  *    [exp1] --> ... --> [expn]
  557.  */
  558. init2    :    "\{"^ init3 ( ","! init3 )* {","!} "\}"!
  559.         ;
  560.  
  561. init3    :    init2
  562.         |    expr1
  563.         ;
  564.